library(bigMap)
Run pt-SNE on Sierpinski-3d with perplexity 2030 (99% of the data set size) and a decreasing range of thread-ratio. We show:
- loss of accuracy with decreasing thread-ratio;
- improvement of the final embedding by means of a final compression of the embedding (EFC);
- using different perplexities in EFC enhances the structure at the corresponding scale while preserving the captured global structure;
# source aux. stuff (graph plot function)
source('../graphs.R')
# load edge matrix
E <- as.matrix(read.csv('../sierpinski3d.edg', sep = '', header = F))
Load data
D <- as.matrix(read.csv('../sierpinski3d.mtx', sep = '', header = F))
Compute betas
g <- bdm.init(D, dSet.name = 'S3D', is.distance = T, ppx = 2030, threads = 4)
Run ptSNE (perplexity = 2030, decreasing thread-ratio)
g1 <- bdm.ptsne(D, g, threads = 1, layers = 1)
g2 <- bdm.ptsne(D, g, threads = 3, layers = 2)
g3 <- bdm.ptsne(D, g, threads = 4, layers = 2)
g4 <- bdm.ptsne(D, g, threads = 5, layers = 2)
g5 <- bdm.ptsne(D, g, threads = 6, layers = 2)
g6 <- bdm.ptsne(D, g, threads = 8, layers = 2)
g.list <- list(g1, g2, g3, g4, g5, g6)
save(g.list, file = './s3d_2030.RData')
hl-Correlation
g.list <- lapply(g.list, function(g) bdm.hlCorr(D, g, threads = 4))
save(g.list, file = './s3d_2030.RData')
hlTable <- sapply(g.list, function(g) summary(g$hlC))
hlTable <- t(round(hlTable, 4))
threadRatio <- sapply(g.list, function(g) g$ptsne$layers /g$ptsne$threads)
rownames(hlTable) <- round(threadRatio, 2)
knitr::kable(hlTable, caption = 'hl-Correlation by thread-ratio') %>%
kable_styling(full_width = F)
hl-Correlation by thread-ratio
|
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
|
1
|
0.7383
|
0.7493
|
0.7547
|
0.7522
|
0.7576
|
0.7609
|
|
0.67
|
0.7403
|
0.7459
|
0.7477
|
0.7496
|
0.7515
|
0.7626
|
|
0.5
|
0.7463
|
0.7464
|
0.7509
|
0.7513
|
0.7559
|
0.7574
|
|
0.4
|
0.7373
|
0.7444
|
0.7502
|
0.7497
|
0.7555
|
0.7612
|
|
0.33
|
0.7549
|
0.7569
|
0.7579
|
0.7590
|
0.7599
|
0.7654
|
|
0.25
|
0.7402
|
0.7435
|
0.7463
|
0.7464
|
0.7491
|
0.7527
|
k-ary neighborhood preservation
g.list <- lapply(g.list, function(g) bdm.knp(D, g, threads = 4))
save(g.list, file = './s3d_2030.RData')
bdm.knp.plot(g.list, ppxfrmt = 0)

Running Times
rTimes <- sapply(g.list, function(g) c(g$ppx$t[3], g$t$epoch, g$t$ptsne[3], sum(c(g$ppx$t[3], g$t$ptsne[3]))))
rTimes <- round(rTimes, 2)
threadRatio <- sapply(g.list, function(g) g$ptsne$layers /g$ptsne$threads)
colnames(rTimes) <- round(threadRatio, 2)
rownames(rTimes) <- c('betas', 'epoch', 'ptSNE', 'total')
knitr::kable(rTimes, caption = 'Computation times (s) by thread-ratio') %>%
kable_styling(full_width = F)
Computation times (s) by thread-ratio
|
|
1
|
0.67
|
0.5
|
0.4
|
0.33
|
0.25
|
|
betas
|
0.38
|
0.38
|
0.38
|
0.38
|
0.38
|
0.38
|
|
epoch
|
1.89
|
0.86
|
0.54
|
0.48
|
0.40
|
0.29
|
|
ptSNE
|
62.85
|
27.84
|
17.01
|
15.12
|
12.41
|
9.02
|
|
total
|
63.22
|
28.21
|
17.39
|
15.50
|
12.79
|
9.39
|
Run on: Intel(R) Xeon(R) CPU E31225 @ 3.10GHz, 4 cores, 16GB RAM.
Embedding final compression (EFC)
# take the embedding with lowest thread-ratio
g.2030c <- g.list[6:6]
# compress with ppx = 1845 (0.90)
g.2030c <- bdm.efc(D, g.2030c, ppx = 1845, iters = 50, threads = 4)
# compress with ppx = 1025 (0.50)
g.2030c <- bdm.efc(D, g.2030c, ppx = 1025, iters = 50, threads = 4)
# compress with ppx = 205 (0.10)
g.2030c <- bdm.efc(D, g.2030c, ppx = 205, iters = 50, threads = 4)
# compress with ppx = 102 (0.05)
g.2030c <- bdm.efc(D, g.2030c, ppx = 102, iters = 50, threads = 4)
# compress with ppx = 20 (0.01)
g.2030c <- bdm.efc(D, g.2030c, ppx = 20, iters = 50, threads = 4)
save(g.2030c, file = './s3d_2030c.RData')
EFC output vs. output from Fig.1 (with thread-ratio=1.0)
# load embeddings with thread-ratio = 1.0 (from Fig.1)
load('../pt-SNE/glist.RData')
g.threadRatio1 <- g.list[c(13, 11, 7, 3, 2, 1)]
g <- g.threadRatio1[[1]]
graph.plot(g, E, title = 'ppx=2030, thread-ratio=1.0')
g <- g.2030c[[1]]
graph.plot(g, E, title = 'ppx=2030, thread-ratio=0.25')


nulL <- lapply(2:6, function(i) {
g <- g.threadRatio1[[i]]
graph.plot(g, E, title = paste('ppx=', g$ppx$ppx, ', thread-ratio=1.0', sep = ''))
g <- g.2030c[[i]]
graph.plot(g, E, title = paste('ppx=2030, thread-ratio=0.25, +efc.', g$ppx$ppx, sep = ''))
})










EFC hl-Correlation
g.2030c <- lapply(g.2030c, function(g) bdm.hlCorr(D, g, zSampleSize = 1000, threads = 4))
hlTable <- sapply(g.2030c, function(g) summary(g$hlC))
hlTable <- t(round(hlTable, 4))
efc.ppx <- paste('efc.', sapply(g.2030c, function(g) g$ppx$ppx), sep = '')
rownames(hlTable) <- efc.ppx
knitr::kable(hlTable, caption = 'hl-Correlation by EFC-perplexity') %>%
kable_styling(full_width = F)
hl-Correlation by EFC-perplexity
|
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
|
efc.2030
|
0.7513
|
0.7527
|
0.7544
|
0.7545
|
0.7561
|
0.7578
|
|
efc.1845
|
0.7342
|
0.7374
|
0.7418
|
0.7430
|
0.7474
|
0.7541
|
|
efc.1025
|
0.7215
|
0.7226
|
0.7264
|
0.7280
|
0.7318
|
0.7375
|
|
efc.205
|
0.7214
|
0.7273
|
0.7313
|
0.7301
|
0.7341
|
0.7364
|
|
efc.102
|
0.7162
|
0.7181
|
0.7199
|
0.7217
|
0.7235
|
0.7310
|
|
efc.20
|
0.7115
|
0.7159
|
0.7249
|
0.7244
|
0.7334
|
0.7366
|
EFC k-ary neighborhood preservation
g.2030c <- lapply(g.2030c, function(g) bdm.knp(D, g, k.max = NULL, sampling = 0.9, threads = 4))
bdm.knp.plot(g.2030c, ppxfrmt = 0)

Running Times
rTimes <- sapply(g.2030c, function(g) c(g$ppx$t[3], g$t$epoch, g$t$ptsne[3], g$t$efc[3], sum(c(g$ppx$t[3], g$t$ptsne[3], g$t$efc[3]))))
rTimes <- round(rTimes, 2)
colnames(rTimes) <- sapply(g.2030c, function(g) g$ppx$ppx)
rownames(rTimes) <- c('betas', 'epoch', 'ptSNE', 'EFC', 'total')
knitr::kable(rTimes, caption = 'Computation times (s) by EFC-perplexity') %>%
kable_styling(full_width = F)
Computation times (s) by EFC-perplexity
|
|
2030
|
1845
|
1025
|
205
|
102
|
20
|
|
betas
|
0.38
|
0.45
|
0.56
|
0.34
|
0.14
|
0.11
|
|
epoch
|
0.29
|
0.29
|
0.29
|
0.29
|
0.29
|
0.29
|
|
ptSNE
|
9.02
|
9.02
|
9.02
|
9.02
|
9.02
|
9.02
|
|
EFC
|
0.00
|
19.76
|
19.23
|
18.83
|
19.15
|
19.19
|
|
total
|
9.39
|
29.23
|
28.81
|
28.19
|
28.31
|
28.32
|
Run on: Intel(R) Xeon(R) CPU E31225 @ 3.10GHz, 4 cores, 16GB RAM.